/* This file performs the simulations discussed in Appendix D in 
   "How Wide is the Firm Border?" We write four programs which differ only in 
   in the number of establishments (given, here, by the "id" variable ) or the 
   number of observations per establishment (id variable). The four programs 
   (in order) correspond to columns 1/2 vs. columns 3/4 vs. columns 5/6 vs. 
   columns 7/8. */

clear all
set seed 1

program poi_sim_1, rclass
	set more off
	drop _all
	set obs 100000	
	gen id=floor((_n-1)/200)
	bys id: gen fe=invnormal(runiform()) if _n==1
	by id: replace fe=fe[1]
	gen x2=invnormal(runiform())
	gen iv=invnormal(runiform())
	gen e2=invnormal(runiform())
	gen x1=2*iv+0.3*x2+e2+5*fe   /* In the paper, this is the expression for s_{z,i^e} */
	gen e=invnormal(runiform())
	gen e1=0.2*e2+e  /* In the paper, this is the expression for \epsilon_{z,i^e} */ 
	gen xb=0.01*x1+0.04*x2+3*fe /* the next three lines compute what is pi_{z,i^e} in the paper */
	gen exb=exp(xb+e1)
	gen y=rpoisson(exb)

	xtset id
	/* Perform the Panel A regressions */
	xtpoisson y x1 x2, fe iterate(300)
		return scalar sx1=_b[x1]
		return scalar sx2=_b[x2]
	
	
	xtset id
	/* Perform the Panel B regressions */
	xtreg x1 iv x2, fe
		return scalar sx7=_b[iv]
		return scalar sx8=_b[x2]
	predict xbhat, xb
	gen error=x1-xbhat
	xtpoisson y x1 x2 error, fe iterate(300)

		return scalar sx3=_b[x1]
		return scalar sx4=_b[x2]
	
	sum iv 
	gen iv_dev=iv-r(mean)	
	sum x2
	gen x2_dev=x2-r(mean)	
	/* Perform the Panel C estimation. */
	noi gmm gmm_poiend, nequations(1) vce(cluster id) onestep ///
	parameters(y:x1 y:x2) instruments(iv_dev x2_dev, noconstant) conv_maxiter(150)
	matrix B=e(b)
		return scalar sx5=B[1,1]
		return scalar sx6=B[1,2]
end

program poi_sim_2, rclass
	set more off
	drop _all
	set obs 200000	
	gen id=floor((_n-1)/400)
	bys id: gen fe=invnormal(runiform()) if _n==1
	by id: replace fe=fe[1]
	gen x2=invnormal(runiform())
	gen iv=invnormal(runiform())
	gen e2=invnormal(runiform())
	gen x1=2*iv+0.3*x2+e2+5*fe
	gen e=invnormal(runiform())
	gen e1=0.2*e2+e
	gen xb=0.01*x1+0.04*x2+3*fe
	gen exb=exp(xb+e1)
	gen y=rpoisson(exb)

	xtset id
	xtpoisson y x1 x2, fe iterate(300)
		return scalar sx1=_b[x1]
		return scalar sx2=_b[x2]
	
	
	xtset id
	xtreg x1 iv x2, fe
		return scalar sx7=_b[iv]
		return scalar sx8=_b[x2]
	predict xbhat, xb
	gen error=x1-xbhat
	xtpoisson y x1 x2 error, fe iterate(300)

		return scalar sx3=_b[x1]
		return scalar sx4=_b[x2]
	
	sum iv 
	gen iv_dev=iv-r(mean)	
	sum x2
	gen x2_dev=x2-r(mean)	

	noi gmm gmm_poiend, nequations(1) vce(cluster id) onestep ///
	parameters(y:x1 y:x2) instruments(iv_dev x2_dev, noconstant) conv_maxiter(150)
	matrix B=e(b)
		return scalar sx5=B[1,1]
		return scalar sx6=B[1,2]
end

program poi_sim_3, rclass
	set more off
	drop _all
	set obs 200000	
	gen id=floor((_n-1)/200)
	bys id: gen fe=invnormal(runiform()) if _n==1
	by id: replace fe=fe[1]
	gen x2=invnormal(runiform())
	gen iv=invnormal(runiform())
	gen e2=invnormal(runiform())
	gen x1=2*iv+0.3*x2+e2+5*fe
	gen e=invnormal(runiform())
	gen e1=0.2*e2+e
	gen xb=0.01*x1+0.04*x2+3*fe
	gen exb=exp(xb+e1)
	gen y=rpoisson(exb)

	xtset id
	xtpoisson y x1 x2, fe iterate(300)
		return scalar sx1=_b[x1]
		return scalar sx2=_b[x2]
	
	
	xtset id
	xtreg x1 iv x2, fe
		return scalar sx7=_b[iv]
		return scalar sx8=_b[x2]
	predict xbhat, xb
	gen error=x1-xbhat
	xtpoisson y x1 x2 error, fe iterate(300)

		return scalar sx3=_b[x1]
		return scalar sx4=_b[x2]
	
	sum iv 
	gen iv_dev=iv-r(mean)	
	sum x2
	gen x2_dev=x2-r(mean)	

	noi gmm gmm_poiend, nequations(1) vce(cluster id) onestep ///
	parameters(y:x1 y:x2) instruments(iv_dev x2_dev, noconstant) conv_maxiter(150)
	matrix B=e(b)
		return scalar sx5=B[1,1]
		return scalar sx6=B[1,2]
end

program poi_sim_4, rclass
	set more off
	drop _all
	set obs 400000	
	gen id=floor((_n-1)/400)
	bys id: gen fe=invnormal(runiform()) if _n==1
	by id: replace fe=fe[1]
	gen x2=invnormal(runiform())
	gen iv=invnormal(runiform())
	gen e2=invnormal(runiform())
	gen x1=2*iv+0.3*x2+e2+5*fe
	gen e=invnormal(runiform())
	gen e1=0.2*e2+e
	gen xb=0.01*x1+0.04*x2+3*fe
	gen exb=exp(xb+e1)
	gen y=rpoisson(exb)

	xtset id
	xtpoisson y x1 x2, fe iterate(300)
		return scalar sx1=_b[x1]
		return scalar sx2=_b[x2]
	
	
	xtset id
	xtreg x1 iv x2, fe
		return scalar sx7=_b[iv]
		return scalar sx8=_b[x2]
	predict xbhat, xb
	gen error=x1-xbhat
	xtpoisson y x1 x2 error, fe iterate(300)

		return scalar sx3=_b[x1]
		return scalar sx4=_b[x2]
	
	sum iv 
	gen iv_dev=iv-r(mean)	
	sum x2
	gen x2_dev=x2-r(mean)	

	noi gmm gmm_poiend, nequations(1) vce(cluster id) onestep ///
	parameters(y:x1 y:x2) instruments(iv_dev x2_dev, noconstant) conv_maxiter(150)
	matrix B=e(b)
		return scalar sx5=B[1,1]
		return scalar sx6=B[1,2]
end

log using gmmiv_log2, text replace

simulate mx1=r(sx1) mx2=r(sx2) mx3=r(sx3) mx4=r(sx4) mx5=r(sx5) mx6=r(sx6) mx7=r(sx7) mx8=r(sx8), reps(1000): poi_sim_4
summarize

simulate mx1=r(sx1) mx2=r(sx2) mx3=r(sx3) mx4=r(sx4) mx5=r(sx5) mx6=r(sx6) mx7=r(sx7) mx8=r(sx8), reps(1000): poi_sim_3
summarize

simulate mx1=r(sx1) mx2=r(sx2) mx3=r(sx3) mx4=r(sx4) mx5=r(sx5) mx6=r(sx6) mx7=r(sx7) mx8=r(sx8), reps(1000): poi_sim_2
summarize

simulate mx1=r(sx1) mx2=r(sx2) mx3=r(sx3) mx4=r(sx4) mx5=r(sx5) mx6=r(sx6) mx7=r(sx7) mx8=r(sx8), reps(1000): poi_sim_1
summarize

log close
